bitkeeper revision 1.653.1.4 (3fe5ac0esxJ46xgoeERN1TvSw4953g)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Sun, 21 Dec 2003 14:19:58 +0000 (14:19 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Sun, 21 Dec 2003 14:19:58 +0000 (14:19 +0000)
dev.c, xen_block.c, memory.c:
  Various fixes for new page-management world.

xen/common/memory.c
xen/drivers/block/xen_block.c
xen/net/dev.c

index c2b4ee9f7ad0d6137fa996b28c7d16a487d21ea4..aeddc3ffe26ccc5e062ba5618db8547c9e3a2e01 100644 (file)
@@ -172,7 +172,6 @@ unsigned int free_pfns;
 static struct {
 #define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
 #define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
-#define DOP_RESTORE_CR0 (1<<2) /* Set the WP bit in CR0.         */
     unsigned long flags;
     unsigned long cr0;
 } deferred_op[NR_CPUS] __cacheline_aligned;
@@ -316,7 +315,7 @@ static int get_page_from_pagenr(unsigned long page_nr)
     }
 
     if ( unlikely(!get_page(page, current)) &&
-         ((current->domain != 0) || !dom0_get_page(page)) )
+         unlikely((current->domain != 0) || !dom0_get_page(page)) )
     {
         MEM_LOG("Could not get page reference for pfn %08lx\n", page_nr);
         return 0;
@@ -372,12 +371,10 @@ static int get_page_from_l1e(l1_pgentry_t l1e)
 {
     ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
 
-    if ( unlikely((l1_pgentry_val(l1e) &
-                   (_PAGE_GLOBAL|_PAGE_PAT))) )
+    if ( unlikely((l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT))) )
     {
         MEM_LOG("Bad L1 page type settings %04lx",
-                l1_pgentry_val(l1e) &
-                (_PAGE_GLOBAL|_PAGE_PAT));
+                l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT));
         return 0;
     }
 
@@ -388,14 +385,10 @@ static int get_page_from_l1e(l1_pgentry_t l1e)
             return 0;
         set_bit(_PGC_tlb_flush_on_type_change, 
                 &frame_table[l1_pgentry_to_pagenr(l1e)].count_and_flags);
-    }
-    else
-    {
-        if ( unlikely(!get_page_from_pagenr(l1_pgentry_to_pagenr(l1e))) )
-            return 0;
+        return 1;
     }
 
-    return 1;
+    return get_page_from_pagenr(l1_pgentry_to_pagenr(l1e));
 }
 
 
@@ -412,9 +405,8 @@ static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
     }
 
     if ( unlikely(!get_page_and_type_from_pagenr(
-        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) &&
-         unlikely(!check_linear_pagetable(l2e, pfn)) )
-        return 0;
+        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) )
+        return check_linear_pagetable(l2e, pfn);
 
     return 1;
 }
@@ -422,12 +414,10 @@ static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
 
 static void put_page_from_l1e(l1_pgentry_t l1e)
 {
-    struct pfn_info *page;
+    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
 
     ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
 
-    page = &frame_table[l1_pgentry_to_pagenr(l1e)];
-
     if ( l1_pgentry_val(l1e) & _PAGE_RW )
     {
         put_page_and_type(page);
@@ -613,34 +603,30 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
     if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
     {
         /* Differ in mapping (bits 12-31) or presence (bit 0)? */
-        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) != 0 )
-        {
-            if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
-                return 0;
-
-            if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
-            {
-                put_page_from_l2e(nl2e, pfn);
-                return 0;
-            }
+        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
+            return update_l2e(pl2e, ol2e, nl2e);
 
-            if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
-                put_page_from_l2e(ol2e, pfn);
-        }
-        else if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
-        {
+        if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
             return 0;
-        }
-    }
-    else
-    {
+        
         if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
+        {
+            put_page_from_l2e(nl2e, pfn);
             return 0;
-
+        }
+        
         if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
             put_page_from_l2e(ol2e, pfn);
+        
+        return 1;
     }
-    
+
+    if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
+        return 0;
+
+    if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
+        put_page_from_l2e(ol2e, pfn);
+
     return 1;
 }
 
@@ -652,26 +638,15 @@ static inline int update_l1e(l1_pgentry_t *pl1e,
     unsigned long o = l1_pgentry_val(ol1e);
     unsigned long n = l1_pgentry_val(nl1e);
 
-    while ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
+    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
+         unlikely(o != l1_pgentry_val(ol1e)) )
     {
-        unsigned int cpu = smp_processor_id();
-        /* The CMPXCHG faulted -- maybe we need to clear the WP bit. */
-        if ( deferred_op[cpu].flags & DOP_RESTORE_CR0 )
-        {
-            MEM_LOG("cmpxchg fault despite WP bit cleared\n");
-            return 0;
-        }
-        deferred_op[cpu].cr0 = read_cr0();
-        write_cr0(deferred_op[cpu].cr0 & ~X86_CR0_WP);
-        deferred_op[cpu].flags |= DOP_RESTORE_CR0;
-    }
-
-    if ( o != l1_pgentry_val(ol1e))
         MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
                 l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
+        return 0;
+    }
 
-    /* The swap was successful if the old value we saw is equal to ol1e. */
-    return (o == l1_pgentry_val(ol1e));
+    return 1;
 }
 
 
@@ -691,38 +666,31 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
 
     if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
     {
-        /*
-         * Differ in mapping (bits 12-31), writeable (bit 1), or
-         * presence (bit 0)?
-         */
-        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) != 0 )
-        {
-            if ( unlikely(!get_page_from_l1e(nl1e)) )
-                return 0;
+        /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */
+        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 )
+            return update_l1e(pl1e, ol1e, nl1e);
 
-            if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
-            {
-                put_page_from_l1e(nl1e);
-                return 0;
-            }
-
-            if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
-                put_page_from_l1e(ol1e);
-        }
-        else if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
-        {
+        if ( unlikely(!get_page_from_l1e(nl1e)) )
             return 0;
-        }
-    }
-    else 
-    {
+        
         if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+        {
+            put_page_from_l1e(nl1e);
             return 0;
-
+        }
+        
         if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
             put_page_from_l1e(ol1e);
+        
+        return 1;
     }
 
+    if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
+        return 0;
+    
+    if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
+        put_page_from_l1e(ol1e);
+
     return 1;
 }
 
@@ -738,12 +706,16 @@ int alloc_page_type(struct pfn_info *page, unsigned int type)
          * NB. 'p' may no longer be valid by time we dereference it, so
          * p->processor might be garbage. We clamp it, just in case.
          */
-        if ( !test_bit(_PGC_zombie, &page->count_and_flags) &&
-             unlikely(NEED_FLUSH(tlbflush_time[(p->processor)&(NR_CPUS-1)], 
-                                 page->tlbflush_timestamp)) )
+        if ( likely(!test_bit(_PGC_zombie, &page->count_and_flags)) )
         {
-            perfc_incr(need_flush_tlb_flush);
-            flush_tlb_cpu(p->processor);
+            unsigned int cpu = p->processor;
+            if ( likely(cpu <= smp_num_cpus) &&
+                 unlikely(NEED_FLUSH(tlbflush_time[cpu],
+                                     page->tlbflush_timestamp)) )
+            {
+                perfc_incr(need_flush_tlb_flush);
+                flush_tlb_cpu(cpu);
+            }
         }
     }
 
@@ -1053,9 +1025,6 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
     if ( flags & DOP_RELOAD_LDT )
         (void)map_ldt_shadow_page(0);
 
-    if ( unlikely(flags & DOP_RESTORE_CR0) )
-        write_cr0(deferred_op[cpu].cr0);
-
     return rc;
 }
 
@@ -1087,9 +1056,6 @@ int do_update_va_mapping(unsigned long page_nr,
 
     if ( unlikely(defer_flags & DOP_RELOAD_LDT) )
         (void)map_ldt_shadow_page(0);
-
-    if ( unlikely(defer_flags & DOP_RESTORE_CR0) )
-        write_cr0(deferred_op[cpu].cr0);
-
+    
     return err;
 }
index 8b1cb119e6e0fb5e4fe034ac98b3534b78a9cab2..878420a045b79e17250a298cef9667cae1df6961 100644 (file)
@@ -433,7 +433,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
 
     /* Check that number of segments is sane. */
-    if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) )
+    if ( unlikely(req->nr_segments == 0) || 
+         unlikely(req->nr_segments > MAX_BLK_SEGS) )
     {
         DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
         goto bad_descriptor;
@@ -450,18 +451,12 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
         buffer   = req->buffer_and_sects[i] & ~0x1FF;
         nr_sects = req->buffer_and_sects[i] &  0x1FF;
 
-        if ( nr_sects == 0 )
+        if ( unlikely(nr_sects == 0) )
         {
             DPRINTK("zero-sized data request\n");
             goto bad_descriptor;
         }
 
-        if ( !lock_buffer(p, buffer, nr_sects<<9, (operation==READ)) )
-       {
-            DPRINTK("invalid buffer\n");
-            goto bad_descriptor;
-       }
-
        phys_seg[nr_psegs].dev           = req->device;
        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
        phys_seg[nr_psegs].buffer        = buffer;
@@ -480,7 +475,6 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
                         req->sector_number + tot_sects, 
                         req->sector_number + tot_sects + nr_sects, 
                         req->device); 
-                unlock_buffer(buffer, nr_sects<<9, (operation==READ));
                 goto bad_descriptor;
             }
 
@@ -494,7 +488,22 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
         }
         
         nr_psegs += new_segs;
-        if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG();
+        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
+    }
+
+    for ( i = 0; i < nr_psegs; i++ )
+    {
+        if ( unlikely(!lock_buffer(p, phys_seg[i].buffer, 
+                                   phys_seg[i].nr_sects << 9,
+                                   operation==READ)) )
+       {
+            DPRINTK("invalid buffer\n");
+            while ( i-- > 0 )
+                unlock_buffer(phys_seg[i].buffer, 
+                              phys_seg[i].nr_sects << 9,
+                              operation==READ);
+            goto bad_descriptor;
+       }
     }
 
     atomic_inc(&nr_pending);
@@ -512,8 +521,9 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
     for ( i = 0; i < nr_psegs; i++ )
     {
         bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
-        if ( bh == NULL ) panic("bh is null\n");
-        memset (bh, 0, sizeof (struct buffer_head));
+        if ( unlikely(bh == NULL) )
+            panic("bh is null\n");
+        memset(bh, 0, sizeof (struct buffer_head));
     
         bh->b_size          = phys_seg[i].nr_sects << 9;
         bh->b_dev           = phys_seg[i].dev;
index 91d6a4e0cf6a525040885d13f5fba8f919565be9..1ec0b5d8cfff02a5e3c8e06c54ca4cd358501353 100644 (file)
@@ -522,6 +522,8 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
     old_page = &frame_table[rx->buf_pfn];
     new_page = skb->pf;
     
+    skb->pf = old_page;
+
     ptep = map_domain_mem(rx->pte_ptr);
 
     new_page->u.domain = p;
@@ -541,6 +543,8 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
                           ((new_page - frame_table) << PAGE_SHIFT))) != pte )
     {
         unmap_domain_mem(ptep);
+        /* At some point maybe should have 'new_page' in error response. */
+        put_page_and_type(new_page);
         status = RING_STATUS_BAD_PAGE;
         goto out;
     }
@@ -550,9 +554,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
     
     unmap_domain_mem(ptep);
 
-    /* Our skbuff now points at the guest's old frame. */
-    skb->pf = old_page;
-
     /* Updates must happen before releasing the descriptor. */
     smp_wmb();
 
@@ -2078,17 +2079,13 @@ static void get_rx_bufs(net_vif_t *vif)
          * just once as a writeable page.
          */
         if ( unlikely(buf_page->u.domain != p) ||
-             unlikely(!test_and_clear_bit(_PGC_allocated, 
-                                          &buf_page->count_and_flags)) ||
              unlikely(cmpxchg(&buf_page->type_and_flags, 
                               PGT_writeable_page|PGT_validated|1,
                               0) != (PGT_writeable_page|PGT_validated|1)) )
         {
             DPRINTK("Bad domain or page mapped writeable more than once.\n");
-            if ( buf_page->u.domain == p )
-                set_bit(_PGC_allocated, &buf_page->count_and_flags);
-            if ( unlikely(cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
-                          (pte & ~_PAGE_PRESENT)) )
+            if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 
+                 (pte & ~_PAGE_PRESENT) )
                 put_page_and_type(buf_page);
             make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
             goto rx_unmap_and_continue;
@@ -2099,11 +2096,17 @@ static void get_rx_bufs(net_vif_t *vif)
          * The final count should be 2, because of PGC_allocated.
          */
         if ( unlikely(cmpxchg(&buf_page->count_and_flags, 
-                              PGC_tlb_flush_on_type_change | 2, 0) != 
-                      (PGC_tlb_flush_on_type_change | 2)) )
+                              PGC_allocated | PGC_tlb_flush_on_type_change | 2,
+                              0) != 
+                      (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
         {
-            DPRINTK("Page held more than once\n");
-            /* Leave the page unmapped at 'ptep'. Stoopid domain! */
+            DPRINTK("Page held more than once %08lx\n", 
+                    buf_page->count_and_flags);
+            if ( get_page_type(buf_page, PGT_writeable_page) &&
+                 (cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
+                  (pte & ~_PAGE_PRESENT)) )
+                put_page_and_type(buf_page);
+            /* NB. If we fail to remap the page, we should probably flag it. */
             make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
             goto rx_unmap_and_continue;
         }